EPA NEI Emissions Trends

Pa-Shun Hawkins & Robert J. Dellinger

Summer Research Project 2024

NEI Emissions Data

Loading and Plotting the Distribution of Facilities

# Define the file path
file_path <- here::here("Data", "EPA_NEI_National_State_Trends", "NEI_2020_Acidfying_Gases_States_of_Interest.xlsx")

# Load the data from the first sheet
acid_gases_data <- read_excel(file_path)

# If you need to load a specific sheet, specify it like this:
# acid_gases_data <- read_excel(file_path, sheet = "Sheet1")

# Extract Latitude and Longitude from the Lat/Lon column
acid_gases_data <- acid_gases_data %>%
  mutate(
    Longitude = as.numeric(gsub("\\[|\\]|,.*", "", `Lat/Lon`)),
    Latitude = as.numeric(gsub("\\[|\\]|.*,", "", `Lat/Lon`)),
    .keep = "unused"
  )

# Convert to an sf object
acid_gases_sf <- acid_gases_data %>%
  st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)

# Load state boundaries
states <- states(cb = TRUE) %>% suppressMessages()

# Plot the emissions data with state boundaries, faceted by Pollutant
ggplot() +
  geom_sf(data = states, fill = NA, color = "black") +  # Add state boundaries
  geom_sf(data = acid_gases_sf, aes(size = `Emissions (Tons)`, color = Pollutant),
          alpha = 0.7, size=0.2)+
  theme_minimal() +
  coord_sf(xlim = c(-92.5, -72.5), ylim = c(34, 44)) +  # Adjust the coordinate limits if needed
  labs(title = "Acidifying Gas Emissions by Facility",
       subtitle = "Locations of Point Source Facilities",
       color = "Pollutant", size = "Emissions (Tons)") +
  theme(
    legend.position = "bottom",  # Move legend to the bottom
    plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    legend.box = "horizontal"  # Arrange the legends horizontally
  ) +
  facet_wrap(~ Pollutant)  # Keep faceting by Pollutant

# Filter the data for SO2 emissions
SO2_data <- acid_gases_data %>%
  filter(Pollutant == "Sulfur Dioxide")

# Convert to an sf object
SO2_sf <- SO2_data %>%
  st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)

# Plot the SO2 emissions data with state boundaries
ggplot() +
  geom_sf(data = states, fill = NA, color = "black") +  # Add state boundaries
  geom_sf(data = SO2_sf, aes(size = `Emissions (Tons)`), color = "royalblue", alpha = 0.7) +
  theme_minimal() +
  coord_sf(xlim = c(-92.5, -72.5), ylim = c(34, 44)) +  # Adjust the coordinate limits if needed
  scale_size_continuous(range = c(1, 5), limits = c(0, 30000)) +  # Adjust the point size based on emission tons
  labs(title = "Sulfur Dioxide (SO₂) Emissions by Facility",
       subtitle = "Distribution and Intensity of Emissions",
       size = "Emissions (Tons)") +
  theme(
    legend.position = "bottom",  # Move legend to the bottom
    plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    legend.box = "horizontal"  # Arrange the legends horizontally
  )

# Filter the data for NOₓ emissions
NOx_data <- acid_gases_data %>%
  filter(Pollutant == "Nitrous Oxide")

# Convert to an sf object
NOx_sf <- NOx_data %>%
  st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)

# Plot the NOₓ emissions data with state boundaries
ggplot() +
  geom_sf(data = states, fill = NA, color = "black") +  # Add state boundaries
  geom_sf(data = NOx_sf, aes(size = `Emissions (Tons)`), color = "lightgreen", alpha = 0.7) +
  theme_minimal() +
  coord_sf(xlim = c(-92.5, -72.5), ylim = c(34, 44)) +  # Adjust the coordinate limits if needed
  scale_size_continuous(range = c(1, 5), limits = c(0, 1000)) +  # Adjust the point size based on emission tons
  labs(title = "Nitrous Oxide (NOx) Emissions by Facility",
       subtitle = "Distribution and Intensity of Emissions",
       size = "Emissions (Tons)") +
  theme(
    legend.position = "bottom",  # Move legend to the bottom
    plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    legend.box = "horizontal"  # Arrange the legends horizontally
  )

# Filter the data for Ammonia emissions
Ammonia_data <- acid_gases_data %>%
  filter(Pollutant == "Ammonia")

# Convert to an sf object
Ammonia_sf <- Ammonia_data %>%
  st_as_sf(coords = c("Longitude", "Latitude"), crs = 4326, remove = FALSE)

# Plot the Ammonia emissions data with state boundaries
ggplot() +
  geom_sf(data = states, fill = NA, color = "black") +  # Add state boundaries
  geom_sf(data = Ammonia_sf, aes(size = `Emissions (Tons)`), color = "orangered", alpha = 0.7) +
  theme_minimal() +
  coord_sf(xlim = c(-92.5, -72.5), ylim = c(34, 44)) +  # Adjust the coordinate limits if needed
  scale_size_continuous(range = c(1, 5), limits = c(0, 1200)) +  # Adjust the point size based on emission tons
  labs(title = "Ammonia (NH₄) Emissions by Facility",
       subtitle = "Distribution and Intensity of Emissions",
       size = "Emissions (Tons)") +
  theme(
    legend.position = "bottom",  # Move legend to the bottom
    plot.title = element_text(hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5),
    legend.box = "horizontal"  # Arrange the legends horizontally
  )

# Emissions Rankings by State, County, & Industry

# Group by State and Pollutant, then calculate total emissions and count the number of facilities
state_facilities_ranking <- acid_gases_data %>%
  group_by(State, Pollutant) %>%
  summarize(
    Facility_Count = n(),  # Count the number of facilities
    Total_Emissions = sum(`Emissions (Tons)`, na.rm = TRUE)  # Sum the emissions
  ) %>%
  arrange(desc(Total_Emissions))
## `summarise()` has grouped output by 'State'. You can override using the
## `.groups` argument.
# Print the result
print(state_facilities_ranking)
## # A tibble: 21 × 4
## # Groups:   State [7]
##    State         Pollutant      Facility_Count Total_Emissions
##    <chr>         <chr>                   <int>           <dbl>
##  1 Ohio          Sulfur Dioxide            932         105750.
##  2 Indiana       Sulfur Dioxide            381          70739.
##  3 Illinois      Sulfur Dioxide           2205          61748.
##  4 Pennsylvania  Sulfur Dioxide           1239          48814.
##  5 Kentucky      Sulfur Dioxide           1034          48083.
##  6 West Virginia Sulfur Dioxide            206          37510.
##  7 Tennessee     Sulfur Dioxide            453          16735.
##  8 Ohio          Ammonia                   393           3630.
##  9 Ohio          Nitrous Oxide             205           2102.
## 10 Pennsylvania  Ammonia                   644           1770.
## # ℹ 11 more rows
# Group by State-County and Pollutant, then calculate total emissions and count the number of facilities
county_facilities_ranking <- acid_gases_data %>%
  group_by(`State-County`, Pollutant) %>%
  summarize(
    Facility_Count = n(),  # Count the number of facilities
    Total_Emissions = sum(`Emissions (Tons)`, na.rm = TRUE)  # Sum the emissions
  ) %>%
  arrange(desc(Total_Emissions))
## `summarise()` has grouped output by 'State-County'. You can override using the
## `.groups` argument.
# Print the result
print(county_facilities_ranking)
## # A tibble: 1,407 × 4
## # Groups:   State-County [573]
##    `State-County`  Pollutant      Facility_Count Total_Emissions
##    <chr>           <chr>                   <int>           <dbl>
##  1 OH - Gallia     Sulfur Dioxide              4          31481.
##  2 OH - Hamilton   Sulfur Dioxide             47          16875.
##  3 IN - Gibson     Sulfur Dioxide              3          13394.
##  4 PA - Armstrong  Sulfur Dioxide             12          13015.
##  5 PA - Indiana    Sulfur Dioxide             13          12659.
##  6 IN - Lake       Sulfur Dioxide             44          11789.
##  7 OH - Jefferson  Sulfur Dioxide              9          11651.
##  8 IN - Porter     Sulfur Dioxide             15          10736.
##  9 IL - Macon      Sulfur Dioxide             32           9846.
## 10 IL - Washington Sulfur Dioxide              5           9779.
## # ℹ 1,397 more rows
# Group by NAICS and Pollutant, then calculate total emissions and count the number of facilities
industry_facilities_ranking <- acid_gases_data %>%
  group_by(NAICS, Pollutant) %>%
  summarize(
    Facility_Count = n(),  # Count the number of facilities
    Total_Emissions = sum(`Emissions (Tons)`, na.rm = TRUE)  # Sum the emissions
  ) %>%
  arrange(desc(Total_Emissions))
## `summarise()` has grouped output by 'NAICS'. You can override using the
## `.groups` argument.
# Print the result
print(industry_facilities_ranking)
## # A tibble: 1,321 × 4
## # Groups:   NAICS [511]
##    NAICS                                Pollutant Facility_Count Total_Emissions
##    <chr>                                <chr>              <int>           <dbl>
##  1 Fossil Fuel Electric Power Generati… Sulfur D…            314         259057.
##  2 All Other Petroleum and Coal Produc… Sulfur D…             16          23778.
##  3 Iron and Steel Mills and Ferroalloy… Sulfur D…             72          12295.
##  4 Wet Corn Milling and Starch Manufac… Sulfur D…             13          11806.
##  5 Solid Waste Landfill                 Sulfur D…            192          10961.
##  6 Cement Manufacturing                 Sulfur D…             20           9371.
##  7 Electric Bulk Power Transmission an… Sulfur D…              6           8611.
##  8 Lime Manufacturing                   Sulfur D…             10           8028.
##  9 Alumina Refining and Primary Alumin… Sulfur D…              3           7769.
## 10 Fossil Fuel Electric Power Generati… Nitrous …            248           4583.
## # ℹ 1,311 more rows

County Emissions

# Download county boundaries for the entire U.S.
counties <- counties(cb = TRUE, resolution = "5m", year=2020) %>% 
  st_as_sf()  # Convert to an sf object

# Merge the county facilities ranking with the spatial county boundaries using the FIPS code
county_map_data <- counties %>%
  left_join(acid_gases_data %>%
              group_by(FIPS, Pollutant) %>% 
              summarize(
                Facility_Count = n(),
                Total_Emissions = sum(`Emissions (Tons)`, na.rm = TRUE)
              ), by = c("GEOID" = "FIPS"))
## `summarise()` has grouped output by 'FIPS'. You can override using the
## `.groups` argument.
# Check the merged data
head(county_map_data)
## Simple feature collection with 6 features and 15 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -115 ymin: 30.7 xmax: -85.5 ymax: 49
## Geodetic CRS:  NAD83
##   STATEFP COUNTYFP COUNTYNS       AFFGEOID GEOID       NAME          NAMELSAD
## 1      04      023 00040472 0500000US04023 04023 Santa Cruz Santa Cruz County
## 2      12      059 00295735 0500000US12059 12059     Holmes     Holmes County
## 3      27      081 00659486 0500000US27081 27081    Lincoln    Lincoln County
## 4      30      029 01719611 0500000US30029 30029   Flathead   Flathead County
## 5      31      135 00835889 0500000US31135 31135    Perkins    Perkins County
## 6      38      075 01034229 0500000US38075 38075   Renville   Renville County
##   STUSPS   STATE_NAME LSAD       ALAND    AWATER Pollutant Facility_Count
## 1     AZ      Arizona   06  3201853240   3068237      <NA>             NA
## 2     FL      Florida   06  1240232910  26393540      <NA>             NA
## 3     MN    Minnesota   06  1390265112  30167787      <NA>             NA
## 4     MT      Montana   06 13175844498 437162491      <NA>             NA
## 5     NE     Nebraska   06  2287768800   2840176      <NA>             NA
## 6     ND North Dakota   06  2272050275  40658499      <NA>             NA
##   Total_Emissions                       geometry
## 1              NA MULTIPOLYGON (((-111.4 31.5...
## 2              NA MULTIPOLYGON (((-86.04 30.9...
## 3              NA MULTIPOLYGON (((-96.45 44.2...
## 4              NA MULTIPOLYGON (((-115 48.23,...
## 5              NA MULTIPOLYGON (((-102.1 41, ...
## 6              NA MULTIPOLYGON (((-102 48.81,...
# Ensure the 'NA' values in the Pollutant column are excluded
county_map_data <- county_map_data %>%
  dplyr::filter(!is.na(Pollutant))

# Function to plot each pollutant separately with a more visible gradient
plot_pollutant_separately <- function(data, variable, title, subtitle) {
  unique_pollutants <- unique(data$Pollutant)
  
  plots <- lapply(unique_pollutants, function(pollutant) {
    p_data <- data %>% dplyr::filter(Pollutant == pollutant)
    
    # Calculate the range of the data for the current pollutant
    value_range <- range(p_data[[variable]], na.rm = TRUE)
    
    ggplot(data = p_data) +
      geom_sf(aes_string(fill = variable)) +
      scale_fill_viridis_c(
        option = "C", 
        limits = value_range,  # Use the range of the current data
        trans = "log",  # Apply a logarithmic transformation for better visibility
        breaks = scales::pretty_breaks(n = 5)  # Adjust the number of breaks in the gradient
      ) +
      coord_sf(xlim = c(-92.5, -72.5), ylim = c(34, 44)) +  # Adjust the coordinate limits if needed
      labs(title = paste(title, "-", pollutant),
           subtitle = subtitle,
           fill = variable) +
      theme_minimal()
  })
  
  return(plots)
}

# Plot the Facility Count by County separately for each pollutant
facility_count_plots <- plot_pollutant_separately(
  county_map_data,
  "Facility_Count",
  "Facility Count by County",
  "Total number of facilities per county"
)
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Plot the Total Emissions by County separately for each pollutant
total_emissions_plots <- plot_pollutant_separately(
  county_map_data,
  "Total_Emissions",
  "Total Emissions by County",
  "Total emissions in tons per county"
)

# Display the plots
facility_count_plots[[1]]

facility_count_plots[[2]]

facility_count_plots[[3]]

total_emissions_plots[[1]]

total_emissions_plots[[2]]

total_emissions_plots[[3]]